# citizen forecasting 2020: a state-by-state experiment
# andreas murr & mike lewis-beck
# prepare AnesInterestNationState.RData

# clear working memory

rm(list=ls())

# load packages

library(foreign)
library(arm)

# ======
# = ts =
# ======

# load data

Anes = read.dta("../data/anes_cdfdta/anes_cdf.dta", convert.factors=FALSE)

# year

Anes$year = Anes$VCF0004

# state

Anes$state = Anes$VCF0901b

# interest

Anes$interest = NA
Anes$interest[Anes$VCF0310==1] = 1
Anes$interest[Anes$VCF0310==2] = 2
Anes$interest[Anes$VCF0310==3] = 3

# forecast
# 1 = dem, 2 = rep, 3 = oth, 8 = dk, 9 = ref

Anes$forecast.state = Anes$VCF9028

Anes$forecast.nation = NA
Anes$forecast.nation[Anes$VCF0700==1] = 1
Anes$forecast.nation[Anes$VCF0700==2] = 2
Anes$forecast.nation[Anes$VCF0700==7] = 3
Anes$forecast.nation[Anes$VCF0700==8] = 8

# save data

# Anes2008 = Anes[Anes$year %in% c(1952, seq(1972, 1996, 4), 2004, 2008), c("year", "state", "forecast.state", "forecast.nation")]

Anes2008 = Anes[c("year", "state", "interest", "forecast.state", "forecast.nation")]

# ========
# = 2012 =
# ========

# load data

Anes = read.dta("../data/anes2012TS_dta/anes2012TS_dataset.dta", convert.factors=FALSE)

# year

Anes$year = 2012

# state

Anes$state = Anes$sample_state

# interest

Anes$interest = NA
Anes$interest[Anes$interest_following==1] = 3
Anes$interest[Anes$interest_following==2] = 2
Anes$interest[Anes$interest_following==3] = 1

# forecast
# 1 = dem, 2 = rep, 3 = oth, 8 = dk, 9 = ref

Anes$forecast.state = Anes$preswin_state
Anes$forecast.state[Anes$preswin_state==5] = 3
Anes$forecast.state[Anes$preswin_state==-8] = 8
Anes$forecast.state[Anes$preswin_state==-9] = 9

Anes$forecast.nation = NA
Anes$forecast.nation[Anes$preswin_win==1] = 1
Anes$forecast.nation[Anes$preswin_win==2] = 2
Anes$forecast.nation[Anes$preswin_win==5] = 3
Anes$forecast.nation[Anes$preswin_win==-8] = 8
Anes$forecast.nation[Anes$preswin_win==-9] = 9

# save data

Anes2012 = Anes[c("year", "state", "interest", "forecast.state", "forecast.nation")]

# ========
# = 2016 =
# ========

# load data

Anes = read.dta("../data/anes_timeseries_2016_dta/anes_timeseries_2016_Stata12.dta", convert.factors=FALSE)

# year

Anes$year = 2016

# state

Anes$state = Anes$V163001b

# interest

# interest

Anes$interest = NA
Anes$interest[Anes$V161004==1] = 3
Anes$interest[Anes$V161004==2] = 2
Anes$interest[Anes$V161004==3] = 1

# forecast
# 1 = dem, 2 = rep, 3 = oth, 8 = dk, 9 = ref

Anes$forecast.state = Anes$V161148
Anes$forecast.state[Anes$V161148==3] = 1
Anes$forecast.state[Anes$V161148==4] = 2
Anes$forecast.state[Anes$V161148==5] = 3
Anes$forecast.state[Anes$V161148==6] = 3
Anes$forecast.state[Anes$V161148==7] = 8
Anes$forecast.state[Anes$V161148==8] = 8
Anes$forecast.state[Anes$V161148==9] = 9
Anes$forecast.state[Anes$V161148==-8] = 8
Anes$forecast.state[Anes$V161148==-9] = 9

Anes$forecast.nation = NA
Anes$forecast.nation[Anes$V161146==1] = 1
Anes$forecast.nation[Anes$V161146==2] = 2
Anes$forecast.nation[Anes$V161146==3] = 1
Anes$forecast.nation[Anes$V161146==4] = 2
Anes$forecast.nation[Anes$V161146==5] = 3
Anes$forecast.nation[Anes$V161146==6] = 3
Anes$forecast.nation[Anes$V161146==7] = 8
Anes$forecast.nation[Anes$V161146==8] = 8
Anes$forecast.nation[Anes$V161146==9] = 9
Anes$forecast.nation[Anes$V161146==-8] = 8
Anes$forecast.nation[Anes$V161146==-9] = 9

# save data

Anes2016 = Anes[c("year", "state", "interest", "forecast.state", "forecast.nation")]

# =====================
# = combine anes data =
# =====================

Anes = rbind(Anes2008, Anes2012, Anes2016)
# Anes$state = as.factor(Anes$state)
Anes$forecast.state = factor(Anes$forecast.state, levels = c(1:3, 8:9), labels = c("d", "r", "o", "dk", "NA"))
Anes$forecast.nation = factor(Anes$forecast.nation, levels = c(1:3, 8:9), labels = c("d", "r", "o", "dk", "NA"))

# =============
# = save data =
# =============

AnesInterestNationState = Anes
save(AnesInterestNationState, file="../data/AnesInterestNationState.RData")

# ==================================================================
# = proportion of forecasts that are the same for state and nation =
# ==================================================================

mean(with(Anes[Anes$forecast.state%in%c("d","r") & Anes$forecast.nation%in%c("d","r"),],  forecast.state==forecast.nation))

range(round(with(Anes[Anes$forecast.state%in%c("d","r") & Anes$forecast.nation%in%c("d","r"),],  tapply(forecast.state==forecast.nation, year, mean))*100))

# ============================
# = proportion of interested =
# ============================

round(mean(Anes$interest, na.rm=TRUE), 1)

round(range(tapply(Anes$interest, Anes$year, function(x){mean(x, na.rm=TRUE)}), na.rm=TRUE),1)

# m = as.matrix(round(prop.table(table(Anes$interest, Anes$year), 2)*100, 0))
#
# plot(sort(unique(Anes$year)), m[3,], type="l", lty=2)



# ===================
# = end source code =
# ===================